import urllib.request
import time
import re
import xlwt

# 用列表存储解析的数据
movies_info = []

# 循环爬取10页数据
for i in range(0,10):
    print('爬取第%d 页数据'%(i+1))
    
    # 构建url访问地址
    url = 'http://maoyan.com/board/4?offset='+str(i*10)


    # 封装headers头信息

    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36'}
    req = urllib.request.Request(url,headers=headers)

    # 发送请求，抓取数据
    res = urllib.request.urlopen(req)

    # 从响应中获取数据，并解码
    html = res.read().decode('utf-8')
    print('共计爬取数据%d条'%len(html))

    # 正则解析序号，图片，电影名称，主演，时间，评分

    pat = '<p class="name"><a href="(.*?)".*?>(.*?)</a>.*?<p class="star">(.*?)</p>.*?<p class="releasetime">(.*?)</p>.*?<p class="score"><i class="integer">(.*?)</i></p>'
    dlist = re.findall(pat,html,re.S)
    print('总共有%d条信息'%len(dlist))
    # 将解析的数据遍历后逐条添加到列表

    for v in dlist:
        movies_info.append(v)

    # 设置时间间隔，防止ip被封
    time.sleep(2)

# 构造excel表格头部

new_excel = xlwt.Workbook()
new_sheet = new_excel.add_sheet('sheet')
new_sheet.write(0,0,'电影名称')
new_sheet.write(0,1,'主演')
new_sheet.write(0,2,'上映时间')
new_sheet.write(0,3,'评分')
new_sheet.write(0,4,'详情链接')

print('共获取到%d条数据，将数据写入到表格中。。。'%len(movies_info))
for r in range(0,len(movies_info)):
    new_sheet.write(r+1,0,movies_info[r][1])  # 电影名称
    new_sheet.write(r+1,1,movies_info[r][2].replace('\n','').replace(' ',''))   # 主演
    new_sheet.write(r+1,2,movies_info[r][3])   # 上映时间
    new_sheet.write(r+1,3,float(movies_info[r][4].replace('</i><i class="fraction">','')))   # 评分
    new_sheet.write(r+1,4,'http://maoyan.com'+movies_info[r][0])    # 详情链接
new_excel.save(r'猫眼电影Top100_requests.xls')
print('写入完成！')
